-
Notifications
You must be signed in to change notification settings - Fork 15.8k
[RISCV][llvm] Support logical comparison codegen for P extension #174626
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Result type of P extension's comparison instructions is same as operands and the result bits are all 1s or 0s so we need to set ZeroOrNegativeOneBooleanContent to make sext(setcc) auto combined.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesResult type of P extension's comparison instructions is same as operands Patch is 30.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174626.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d6b62736bdf60..43598eec5137d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -546,6 +546,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, VTs, Custom);
setOperationAction(ISD::BITCAST, VTs, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
+ setOperationAction(ISD::SETCC, VTs, Legal);
+ // P extension vector comparisons produce all 1s for true, all 0s for false
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
}
if (Subtarget.hasStdExtZfbfmin()) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 6830b476f5cd3..7a68707336050 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1584,6 +1584,54 @@ let Predicates = [HasStdExtP] in {
// // splat pattern
def: Pat<(XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))), (PADD_BS (XLenVT X0), GPR:$rs2)>;
def: Pat<(XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))), (PADD_HS (XLenVT X0), GPR:$rs2)>;
+
+ // 8/16-bit comparison patterns (result is all 1s or all 0s per element)
+ // a == b
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETEQ)),
+ (PMSEQ_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETEQ)),
+ (PMSEQ_H GPR:$rs1, GPR:$rs2)>;
+ // a != b => !(a == b)
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETNE)),
+ (XORI (PMSEQ_B GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETNE)),
+ (XORI (PMSEQ_H GPR:$rs1, GPR:$rs2), -1)>;
+ // a < b
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLT)),
+ (PMSLT_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULT)),
+ (PMSLTU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLT)),
+ (PMSLT_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
+ (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
+ // a <= b => !(b < a)
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLE)),
+ (XORI (PMSLT_B GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULE)),
+ (XORI (PMSLTU_B GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLE)),
+ (XORI (PMSLT_H GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULE)),
+ (XORI (PMSLTU_H GPR:$rs2, GPR:$rs1), -1)>;
+ // a > b => b < a
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGT)),
+ (PMSLT_B GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGT)),
+ (PMSLTU_B GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGT)),
+ (PMSLT_H GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGT)),
+ (PMSLTU_H GPR:$rs2, GPR:$rs1)>;
+ // a >= b => !(a < b)
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGE)),
+ (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGE)),
+ (XORI (PMSLTU_B GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGE)),
+ (XORI (PMSLT_H GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGE)),
+ (XORI (PMSLTU_H GPR:$rs1, GPR:$rs2), -1)>;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV32] in {
@@ -1643,6 +1691,34 @@ let Predicates = [HasStdExtP, IsRV64] in {
// splat pattern
def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>;
+ // 32-bit comparison patterns (result is all 1s or all 0s per element)
+ // a == b
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETEQ)),
+ (PMSEQ_W GPR:$rs1, GPR:$rs2)>;
+ // a != b => !(a == b)
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETNE)),
+ (XORI (PMSEQ_W GPR:$rs1, GPR:$rs2), -1)>;
+ // a < b
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLT)),
+ (PMSLT_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULT)),
+ (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
+ // a <= b => !(b < a)
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLE)),
+ (XORI (PMSLT_W GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULE)),
+ (XORI (PMSLTU_W GPR:$rs2, GPR:$rs1), -1)>;
+ // a > b => b < a
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGT)),
+ (PMSLT_W GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGT)),
+ (PMSLTU_W GPR:$rs2, GPR:$rs1)>;
+ // a >= b => !(a < b)
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGE)),
+ (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGE)),
+ (XORI (PMSLTU_W GPR:$rs1, GPR:$rs2), -1)>;
+
// 32-bit logical shift left/right patterns
def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
(PSLLI_W GPR:$rs1, uimm5:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 2836cda16b6d9..a1728e72ce3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1118,3 +1118,335 @@ define void @test_pmulhsu_h_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <2 x i16> %res, ptr %ret_ptr
ret void
}
+
+; Comparison operations for v2i16
+define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp eq <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ne <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp slt <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp sle <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp sgt <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp sge <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ult <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ule <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ugt <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp uge <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Comparison operations for v4i8
+define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp eq <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.b a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ne <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp slt <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp sle <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp sgt <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp sge <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ult <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ule <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ugt <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp uge <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index dfa1b242e656f..e08871b4e63bf 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -1148,3 +1148,501 @@ define void @test_pmulhsu_w_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <2 x i32> %res, ptr %ret_ptr
ret void
}
+
+; Comparison operations for v4i16
+define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp eq <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp ne <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp slt <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp sle <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp sgt <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp sge <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp ult <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp ule <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x...
[truncated]
|
| (PMSLTU_H GPR:$rs1, GPR:$rs2)>; | ||
| // a <= b => !(b < a) | ||
| def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLE)), | ||
| (XORI (PMSLT_B GPR:$rs2, GPR:$rs1), -1)>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The rs1 and rs2 names belong to the instruction. They should always be (PMSLT_B GPR:$rs1, GPR:$rs2). The input pattern is where the swap should occur.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh I see
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/34072 Here is the relevant piece of the build log for the reference |
…m#174626) Result type of P extension's comparison instructions is same as operands and the result bits are all 1s or 0s so we need to set ZeroOrNegativeOneBooleanContent to make sext(setcc) auto combined.
Result type of P extension's comparison instructions is same as operands
and the result bits are all 1s or 0s so we need to set
ZeroOrNegativeOneBooleanContent to make sext(setcc) auto combined.